Background information on the dataset…
Additional links or sources…
library(tidyverse)
library(tidytuesdayR)
NFL_Data <-tidytuesdayR::tt_load("2018-08-28")
nfl_stats <- NFL_Data$`nfl_2010-2017`glimpse(nfl_stats)Rows: 81,525
Columns: 23
$ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
$ name <chr> "Duce Staley", "Lamar Smith", "Tiki Barber", "Stephen Dav…
$ team <chr> "PHI", "MIA", "NYG", "WAS", "IND", "BAL", "NYJ", "MIN", "…
$ game_year <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 200…
$ game_week <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ rush_att <dbl> 26, 27, 13, 23, 28, 27, 30, 14, 15, 10, 20, 13, 23, 14, 2…
$ rush_yds <dbl> 201, 145, 144, 133, 124, 119, 110, 109, 88, 87, 84, 80, 7…
$ rush_avg <dbl> 7.7, 5.4, 11.1, 5.8, 4.4, 4.4, 3.7, 7.8, 5.9, 8.7, 4.2, 6…
$ rush_tds <dbl> 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 3, …
$ rush_fumbles <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 1, 1, …
$ rec <dbl> 4, 1, 3, 4, 6, 4, 6, 2, 2, NA, 4, 3, 1, 4, 1, 1, 1, NA, N…
$ rec_yds <dbl> 61, 12, 25, 37, 40, 32, 34, 3, 20, NA, 29, 10, -2, 100, 1…
$ rec_avg <dbl> 15.3, 12.0, 8.3, 9.3, 6.7, 8.0, 5.7, 1.5, 10.0, NA, 7.3, …
$ rec_tds <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, NA, 0, 0, 0, 1, 0, 0, 0, NA, N…
$ rec_fumbles <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, NA, N…
$ pass_att <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 41, NA, NA, NA, NA, N…
$ pass_yds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 290, NA, NA, NA, NA, …
$ pass_tds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ int <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ sck <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ pass_fumbles <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ rate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 102.7, NA, NA, NA, NA…
$ position <chr> "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "QB…
hi_rsh <- filter(nfl_stats, rush_yds > 200) %>%
select(name, rush_yds, starts_with("game"))
hi_rsh %>%
count(game_year)# A tibble: 17 × 2
game_year n
<dbl> <int>
1 2000 10
2 2001 2
3 2002 4
4 2003 3
5 2004 2
6 2005 3
7 2006 4
8 2007 3
9 2008 2
10 2009 5
# … with 7 more rows
glimpse(nfl_stats)Rows: 81,525
Columns: 23
$ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
$ name <chr> "Duce Staley", "Lamar Smith", "Tiki Barber", "Stephen Dav…
$ team <chr> "PHI", "MIA", "NYG", "WAS", "IND", "BAL", "NYJ", "MIN", "…
$ game_year <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 200…
$ game_week <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ rush_att <dbl> 26, 27, 13, 23, 28, 27, 30, 14, 15, 10, 20, 13, 23, 14, 2…
$ rush_yds <dbl> 201, 145, 144, 133, 124, 119, 110, 109, 88, 87, 84, 80, 7…
$ rush_avg <dbl> 7.7, 5.4, 11.1, 5.8, 4.4, 4.4, 3.7, 7.8, 5.9, 8.7, 4.2, 6…
$ rush_tds <dbl> 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 3, …
$ rush_fumbles <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 1, 1, …
$ rec <dbl> 4, 1, 3, 4, 6, 4, 6, 2, 2, NA, 4, 3, 1, 4, 1, 1, 1, NA, N…
$ rec_yds <dbl> 61, 12, 25, 37, 40, 32, 34, 3, 20, NA, 29, 10, -2, 100, 1…
$ rec_avg <dbl> 15.3, 12.0, 8.3, 9.3, 6.7, 8.0, 5.7, 1.5, 10.0, NA, 7.3, …
$ rec_tds <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, NA, 0, 0, 0, 1, 0, 0, 0, NA, N…
$ rec_fumbles <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, NA, N…
$ pass_att <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 41, NA, NA, NA, NA, N…
$ pass_yds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 290, NA, NA, NA, NA, …
$ pass_tds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ int <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ sck <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ pass_fumbles <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ rate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 102.7, NA, NA, NA, NA…
$ position <chr> "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "QB…
hi_pass <-filter(nfl_stats, pass_yds > 500) %>%
select(name, pass_yds, starts_with("game"))
hi_pass %>%
count(game_year)# A tibble: 10 × 2
game_year n
<dbl> <int>
1 2000 1
2 2006 1
3 2009 1
4 2011 1
5 2012 2
6 2013 1
7 2014 1
8 2015 2
9 2016 2
10 2017 1
glimpse(nfl_stats)Rows: 81,525
Columns: 23
$ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
$ name <chr> "Duce Staley", "Lamar Smith", "Tiki Barber", "Stephen Dav…
$ team <chr> "PHI", "MIA", "NYG", "WAS", "IND", "BAL", "NYJ", "MIN", "…
$ game_year <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 200…
$ game_week <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ rush_att <dbl> 26, 27, 13, 23, 28, 27, 30, 14, 15, 10, 20, 13, 23, 14, 2…
$ rush_yds <dbl> 201, 145, 144, 133, 124, 119, 110, 109, 88, 87, 84, 80, 7…
$ rush_avg <dbl> 7.7, 5.4, 11.1, 5.8, 4.4, 4.4, 3.7, 7.8, 5.9, 8.7, 4.2, 6…
$ rush_tds <dbl> 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 3, …
$ rush_fumbles <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 1, 1, …
$ rec <dbl> 4, 1, 3, 4, 6, 4, 6, 2, 2, NA, 4, 3, 1, 4, 1, 1, 1, NA, N…
$ rec_yds <dbl> 61, 12, 25, 37, 40, 32, 34, 3, 20, NA, 29, 10, -2, 100, 1…
$ rec_avg <dbl> 15.3, 12.0, 8.3, 9.3, 6.7, 8.0, 5.7, 1.5, 10.0, NA, 7.3, …
$ rec_tds <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, NA, 0, 0, 0, 1, 0, 0, 0, NA, N…
$ rec_fumbles <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, NA, N…
$ pass_att <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 41, NA, NA, NA, NA, N…
$ pass_yds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 290, NA, NA, NA, NA, …
$ pass_tds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ int <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ sck <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ pass_fumbles <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ rate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 102.7, NA, NA, NA, NA…
$ position <chr> "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "QB…
hi_rec <-filter(nfl_stats, rec_yds > 200) %>%
select(name, pass_yds, starts_with("game"))
hi_pass %>%
count(game_year)# A tibble: 10 × 2
game_year n
<dbl> <int>
1 2000 1
2 2006 1
3 2009 1
4 2011 1
5 2012 2
6 2013 1
7 2014 1
8 2015 2
9 2016 2
10 2017 1
Information about the dataset…
Code for creating graphs…
labs_grp_bubble <- labs(
title = "Rushing Attempts Leaders",
x = "Year", y = "Name",
size = "Rushing Yards")
ggp2_grp_bubble <- filter(nfl_stats,
rush_att > 33) |>
ggplot(aes(
x = game_year,
y = name)) +
geom_point(
aes(size = rush_yds,
fill= name),
show.legend = FALSE,
alpha = 2/3,
shape = 21,
color = "black") +
scale_size(range = c(1,7),
name = "rushing yards")+
ggthemes::theme_few(
base_size = 11)
#color is a fill=rush_yds inside the AES funtcion
ggp2_grp_bubble +
labs_grp_bubblelabs_scatter <- labs(
title = "Highest Rushers",
x = "game_year", y = "name)")
ggp2_scatter <- filter(nfl_stats, rush_yds > 200)|>
ggplot(
aes(x = game_year,
y = name)) +
geom_point()
ggp2_scatter +
labs_scatterlabs_grp_bubble <- labs(
title = "Receptions >15",
x = "Year", y = "Name",
size = "Rushing Yards")
ggp2_grp_bubble <- filter(nfl_stats, rec > 12) |>
ggplot(aes(
x = game_year, y = name)) +
geom_point(
aes(size = rush_yds, fill= name),
show.legend = FALSE,
alpha = 2/3, shape = 21, color = "black") +
scale_size(range = c(1,7), name = "rushing yards")+
ggthemes::theme_few()
#color is a fill=rush_yds inside the AES funtcion
ggp2_grp_bubble +
labs_grp_bubblelabs_scatter <- labs(
title = "Highest Recivers",
x = "game_year", y = "name)")
ggp2_scatter <- filter(nfl_stats, rec_yds > 200)|>
ggplot(
aes(x = game_year,
y = name)) +
geom_point()
ggp2_scatter +
labs_scatterlabs_grp_bubble <- labs(
title = "Pass Attempt Leaders",
x = "Year", y = "Name",
size = "Rushing Yards")
ggp2_grp_bubble <- filter(nfl_stats, pass_att > 55) |>
ggplot(aes(
x = game_year, y = name)) +
geom_point(
aes(size = rush_yds, fill= name),
show.legend = FALSE,
alpha = 2/3, shape = 21, color = "black") +
scale_size(range = c(1,7), name = "rushing yards")+
ggthemes::theme_few()
#color is a fill=rush_yds inside the AES funtcion
ggp2_grp_bubble +
labs_grp_bubblelabs_scatter <- labs(
title = "Highest Passers",
x = "game_year", y = "name)")
ggp2_scatter <- filter(nfl_stats, pass_yds > 500)|>
ggplot(
aes(x = game_year,
y = name)) +
geom_point()
ggp2_scatter +
labs_scatterGraphs for data…